pops<-c("PWS","TB","SS")
covs<-data.frame()
Variance<-data.frame()
winsize<-c("50k","75k","100k","250k")
for (w in 1: length(winsize)){
covs<-data.frame()
for (p in 1: length(pops)){
#covariance output file
cov<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/3Pops_maf05_temp_cov_matrix_",pops[p],"_",winsize[w],".csv"))
cov<-cov[,-1]
ci<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/3Pops_maf05_",pops[p],"_Cov_CIs_bootstrap5000_",winsize[w],"window.csv"))
ci<-ci[,-1]
#reshape the matrix
mat1<-cov[1:3,]
mat2<-cov[4:6,]
covdf<-data.frame()
k=1
for (i in 1:nrow(mat1)){
for (j in 1:ncol(mat1)){
covdf[k,1]<-mat2[i,j]
covdf[k,2]<-mat1[i,j]
k=k+1
}
}
colnames(covdf)<-c("label","value")
covdf$value<-as.numeric(covdf$value)
covar<-covdf[grep("cov",covdf$label),]
vars<-covdf[grep("var",covdf$label),]
#remove the redundant values
if (pops[p]!="SS") covar<-covar[!duplicated(covar[, 2]),]
if (pops[p]=="SS") covar<-covar[c(1,2,4),]
#assign the starting time period and covering period values
covar$year<-c(1,2,2)
covar$series<-c("1991","1991","1996")
vars$year<-c(1,2,2)
vars$series<-c("1991","1991","1996")
#assign population name
covar$location<-pops[p]
vars$location<-pops[p]
#attach ci info
covar$ci_l<-c(ci[1,2], ci[1,3],ci[2,3])
covar$ci_u<-c(ci[4,2], ci[4,3],ci[5,3])
#combine in to one matrix
covs<-rbind(covs, covar)
Variance<-rbind(Variance, vars)
}
covs$ci_l<-as.numeric(covs$ci_l)
covs$ci_u<-as.numeric(covs$ci_u)
ggplot(data=covs, aes(x=year, y=value, color=location, shape=series, group=interaction(location, series)))+
geom_point(size=3, position=position_dodge(width = 0.1,preserve ="total"))+
#geom_errorbar(data=covs, aes(x=year, y=value, ymin=ci_l, ymax=ci_u), width=.2, size=.2, position=position_dodge(width = 0.1,preserve ="total"))+
geom_line(data=covs, aes(x=year, y=value,color=location, group=interaction(location, series)), position=position_dodge(width = 0.1,preserve ="total"))+
ylab("Covariance")+xlab('')+theme_classic()+
theme(axis.text.x = element_blank(),legend.title = element_blank())+
geom_hline(yintercept = 0,color="gray70", size=0.3)+
geom_errorbar(aes(ymin=ci_l, ymax=ci_u), width=.2, size=.2, position=position_dodge(width = 0.1,preserve ="total"))+
scale_shape_manual(values=c(16,17),labels=c("1991-","1996-"))+
scale_x_continuous(breaks = c(1,2))
ggsave(paste0("../Output/COV/3Pops_Cov_overtime_CI_",winsize[w],".window.png"),width = 4.7, height = 3, dpi=300)
} #Find the regions with a high temporal covariance
pops<-c("PWS","TB","SS")
winsize<-c("50k","75k","100k")
evens<-paste0("chr",seq(2,26, by=2))
cov.list<-list()
covs_all<-list()
k=1
for (p in 1: length(pops)){
pop<-pops[p]
for (i in 1: length(winsize)){
iv<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/3pops_intervals_",winsize[i],"window.csv"), row.names = 1)
if (p==3) {
cov23<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/",pop,"_cov23_2017-2006_2006-1996_3Pops_",winsize[i],"window.csv"), header = F)
covs<-cbind(iv, cov23)
colnames(covs)[4]<-c("cov23")
covs$index=1:nrow(covs)
covs$color<-"col1"
covs$color[covs$chrom %in% evens]<-"col2"
covs[sapply(covs, is.infinite)] <- NA
covs[sapply(covs, is.nan)] <- NA
cov.list[[k]]<-covs
names(cov.list)[k]<-paste0(pop,"_",winsize[i])
k=k+1
y<-min(covs$cov23, na.rm=T)
ymin<-ifelse (y<=-0.1,-0.1, y)
ymax<-max(covs$cov23, na.rm=T)
ggplot(covs, aes(x=index, y=cov23, color=color))+
geom_point(size=1, alpha=0.5)+
theme_classic()+
ylim(ymin,ymax)+
scale_color_manual(values=c("gray70","steelblue"), guide="none")+
ylab("Covariance")+xlab('Chromosome')+
theme(axis.text.x = element_blank())+
ggtitle(paste0(pop," ", winsize[i]," window"))
#ggsave(paste0("../Output/COV/3Pops.",pop,"_tempCovs_acrossGenome_",winsize[i], "Window.png"), width = 8, height = 2.7, dpi=300)
}
else {
cov12<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/",pop,"_cov12_1996-1991_2006-1996_3Pops_",winsize[i],"window.csv"), header = F)
cov23<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/",pop,"_cov23_2017-2006_2006-1996_3Pops_",winsize[i],"window.csv"), header = F)
cov13<-read.csv(paste0("~/Projects/Pacherring_Vincent/MD7000/",pop,"_cov13_2017-2006_1996-1991_3Pops_",winsize[i],"window.csv"), header = F)
covs<-cbind(iv, cov12, cov23,cov13)
colnames(covs)[4:6]<-c("cov12","cov23","cov13")
covs$index=1:nrow(covs)
covs$color<-"col1"
covs$color[covs$chrom %in% evens]<-"col2"
covs[sapply(covs, is.infinite)] <- NA
covs[sapply(covs, is.nan)] <- NA
cov.list[[k]]<-covs
names(cov.list)[k]<-paste0(pop,"_",winsize[i])
k=k+1
covsm<-melt(covs[,c("index","color","cov12","cov23","cov13")], id.vars = c("index", "color"))
ymax<-max(covsm$value, na.rm=T)
y<-min(covsm$value, na.rm=T)
ymin<-ifelse (y<=-0.1,-0.1, y)
ggplot(covsm, aes(x=index, y=value, color=color))+
facet_wrap(~variable, nrow=3)+
geom_point(size=1, alpha=0.5)+
theme_classic()+
ylim(ymin,ymax)+
scale_color_manual(values=c("gray70","steelblue"), guide="none")+
ylab("Covariance")+xlab('Chromosome')+
theme(axis.text.x = element_blank())+
ggtitle(paste0(pop," ", winsize[i]," window"))
#ggsave(paste0("../Output/COV/3Pops.",pop,"_tempCovs_acrossGenome_",winsize[i], "Window.png"), width = 8, height = 8, dpi=300)
}
}
}
#find how outliers overlap between different windows
cov12<-data.frame()
cov23<-data.frame()
cov13<-data.frame()
for (i in 1:length(cov.list)){
if (grepl("PWS",names(cov.list)[i])|grepl("TB",names(cov.list)[i])){
covs<-cov.list[[i]]
pop<-gsub("_.+",'', names(cov.list)[i])
win<-gsub(paste0(pop,"_"), '', names(cov.list)[i])
covs<-covs[order(covs$cov12, decreasing=T),]
n<-ceiling(nrow(covs)*0.01) #top1% region
covs12_top<-covs[1:n,c(1:4)]
covs12_top<-covs12_top[order(covs12_top$chrom, covs12_top$start),]
covs12_top$window<-win
covs12_top$pop<-pop
cov12<-rbind(cov12, covs12_top)
covs<-covs[order(covs$cov13, decreasing=T),]
n<-ceiling(nrow(covs)*0.01) #top1% region
covs13_top<-covs[1:n,c(1:3,6)]
covs13_top<-covs13_top[order(covs13_top$chrom, covs13_top$start),]
covs13_top$window<-win
covs13_top$pop<-pop
cov13<-rbind(cov13, covs13_top)
covs<-covs[order(covs$cov23, decreasing=T),]
n<-ceiling(nrow(covs)*0.01) #top1% region
covs23_top<-covs[1:n,c(1:3,5)]
covs23_top<-covs23_top[order(covs23_top$chrom, covs23_top$start),]
covs23_top$window<-win
covs23_top$pop<-pop
cov23<-rbind(cov23, covs23_top)
}
if (grepl("SS",names(cov.list)[i])){
covs<-cov.list[[i]]
pop<-gsub("_.+",'', names(cov.list)[i])
win<-gsub(paste0(pop,"_"), '', names(cov.list)[i])
covs<-covs[order(covs$cov23, decreasing=T),]
n<-ceiling(nrow(covs)*0.01) #top1% region
covs23_top<-covs[1:n,c(1:4)]
covs23_top<-covs23_top[order(covs23_top$chrom, covs23_top$start),]
covs23_top$window<-win
covs23_top$pop<-pop
cov23<-rbind(cov23, covs23_top)
}
}
write.csv(cov12, "../Output/COV/3pops_top1percent_outlier_regions.cov12_new.csv",row.names = F)
write.csv(cov23, "../Output/COV/3pops_top1percent_outlier_regions.cov23_new.csv",row.names = F)
write.csv(cov13, "../Output/COV/3pops_top1percent_outlier_regions.cov13_new.csv",row.names = F)#Create plots with different colors for outliers
#for COV12 and COV13 for TB and PWS (100K)
cv<-c("cov12","cov13","cov23")
winsize<-c("50k","75k","100k")
for (i in 1:length(cv)){
if (i==1|i==2){
for (w in 1: length(winsize)){
#PWS
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[order(df1[,cv[i]], decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[order(df2[,cv[i]], decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
co<-rbind(df1, df2)
co$chrom<-factor(co$chrom, levels=paste0("chr", 1:26))
co$top1<-factor(co$top1, levels=c("PWS","TB","N"))
colnames(co)[which(colnames(co)==cv[i])]<-"cov"
ymax<-max(co$cov, na.rm=T)
ggplot(co, aes(x=start/1000000, y=cov, color=top1))+
geom_point(size=0.5)+
facet_wrap(~chrom, ncol=4)+
theme_classic()+ylim(-0.1,ymax)+
scale_color_manual(values=c("deeppink","orange" ,"#ADD8E680"), labels=c("PWS", "TB", ""))+
ylab("Covariance")+xlab('Postion (Mb)')+
ggtitle(paste0(winsize[w]," window ",cv[i]))+
scale_x_continuous(labels = comma)+
guides(color = guide_legend(override.aes = list(color=c("deeppink","orange","white")), title=element_text("Top 1%")))
ggsave(paste0("../Output/COV/COVscan_3pop/3Pops.",cv[i],"_perChrom_",winsize[w], "Window_Outliers.png"), width = 10, height = 8, dpi=300)
}
}
if (i==3){
for (w in 1: length(winsize)){
#pws
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[,c("chrom","start","end","cov23")]
df1<-df1[order(df1$cov23, decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[,c("chrom","start","end","cov23")]
df2<-df2[order(df2$cov23, decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
#ss
df3<-cov.list[[paste0("SS_", winsize[w])]]
df3<-df3[,c("chrom","start","end","cov23")]
df3<-df3[order(df3$cov23, decreasing=T),]
df3$top1<-"N"
df3$top1[1:n]<-"SS"
co<-rbind(df1,df2,df3)
co$chrom<-factor(co$chrom, levels=paste0("chr", 1:26))
co$top1<-factor(co$top1, levels=c("PWS","TB","SS","N"))
ymax<-max(co$cov23, na.rm=T)
ggplot(co, aes(x=start/1000000, y=cov23, color=top1))+
geom_point(size=0.5)+
facet_wrap(~chrom, ncol=4)+
theme_classic()+ylim(-0.1,ymax)+
ylab("Covariance")+xlab('Postion (Mb)')+
ggtitle(paste0(winsize[w]," window ",cv[i]))+
scale_x_continuous(labels = comma)+
#scale_color_discrete(breaks=c("PWS","SS","TB"))+
scale_color_manual(values=c("deeppink","orange",gre,"#ADD8E666"), labels=c("PWS","TB","SS", ""))+
guides(color = guide_legend(override.aes = list(color=c("deeppink","orange",gre, "white")),title=element_text("Top 1% outliers")))
ggsave(paste0("../Output/COV/COVscan_3pop/3Pops.cov23_3Pops_perChrom_",winsize[w], "Window_Outliers.png"), width = 10, height = 9, dpi=300)
}
}
}
cv<-c("cov12","cov13","cov23")
for (i in 1:length(cv)){
if (i==1|i==2){
df1<-cov.list[["PWS_100k"]]
df1<-df1[order(df1[,cv[i]], decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
df2<-cov.list[["TB_100k"]]
df2<-df2[order(df2[,cv[i]], decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
co<-rbind(df1, df2)
co$chrom<-factor(co$chrom, levels=paste0("chr", 1:26))
colnames(co)[which(colnames(co)==cv[i])]<-"cov"
#assgin colors
co$top1<-apply(co, 1, function(x) {ifelse (x['top1']=="N", x['color'], x['top1'])} )
co$top1<-factor(co$top1, levels=c("PWS","TB","col1","col2"))
#count the number of sites per chromosomes
poss<-data.frame(chr=paste0("chr",1:26))
k=1
for (j in 1:26){
df<-df1[df1$chr==paste0("chr",j),]
poss$start[j]<-k
poss$end[j]<-k+nrow(df)-1
k=k+nrow(df)
}
poss$x<-poss$start+(poss$end-poss$start)/2
ymax<-max(co$cov, na.rm=T)
ggplot(co, aes(x=index, y=cov, color=top1))+
geom_point(size=0.5)+
theme_classic()+ylim(-0.1,ymax)+
scale_color_manual(values=c("#FF3293B3","#0433FFB3" ,"#ADD8E680","#C0C0C088"), labels=c("PWS", "TB", "",""))+
ylab("Covariance")+
ggtitle(paste0(" 100k window ",cv[i]))+
guides(color = guide_legend(override.aes = list(color=c("deeppink","#0433FF","white","white"), size=2), title=element_text("Outlier (1%)", size=10)))+
scale_x_continuous(name="Chromosome", breaks=poss$x, labels=1:26)
ggsave(paste0("../Output/COV/COVscan_3pop/3Pops.",cv[i],"_100k_Window_Outliers.png"), width = 10, height = 3.5, dpi=300)
}}
if (i==3){
#pws
df1<-cov.list[["PWS_100k"]]
df1<-df1[,c("chrom","start","end","cov23","index","color")]
df1<-df1[order(df1$cov23, decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-df1$color
df1$top1[1:n]<-"PWS"
#tb
df2<-cov.list[["TB_100k"]]
df2<-df2[,c("chrom","start","end","cov23","index","color")]
df2<-df2[order(df2$cov23, decreasing=T),]
df2$top1<-df2$color
df2$top1[1:n]<-"TB"
#ss
df3<-cov.list[["SS_100k"]]
df3<-df3[,c("chrom","start","end","cov23","index","color")]
df3<-df3[order(df3$cov23, decreasing=T),]
df3$top1<-df3$color
df3$top1[1:n]<-"SS"
co<-rbind(df1,df2,df3)
co$top1<-factor(co$top1, levels=c("PWS","TB","SS","col1","col2"))
#count the number of sites per chromosomes
poss<-data.frame(chr=paste0("chr",1:26))
k=1
for (j in 1:26){
df<-df1[df1$chr==paste0("chr",j),]
poss$start[j]<-k
poss$end[j]<-k+nrow(df)-1
k=k+nrow(df)
}
poss$x<-poss$start+(poss$end-poss$start)/2
ymax<-max(co$cov, na.rm=T)
ggplot(co, aes(x=index, y=cov23, color=top1))+
geom_point(size=0.5)+
theme_classic()+ylim(-0.1,ymax)+
scale_color_manual(values=c("#FF3293B3","#0433FFB3","#008F00B3" ,"#ADD8E680","#C0C0C088"), labels=c("PWS", "TB","SS", "",""))+
ylab("Covariance")+
ggtitle(paste0(" 100k window ",cv[i]))+
guides(color = guide_legend(override.aes = list(color=c("deeppink","#0433FF","#008F00","white","white"), size=2), title=element_text("Outlier (1%)")))+
scale_x_continuous(name="Chromosome", breaks=poss$x, labels=1:26)+
theme(legend.title = element_text(size=10))
ggsave(paste0("../Output/COV/COVscan_3pop/3Pops.",cv[i],"_100k_Window_Outliers.png"), width = 10, height = 3.5, dpi=300)
}
}
}
## Plot 3 time periods together for PWS and TB
Cov<-data.frame()
for (i in 1:length(cv)){
df1<-cov.list[["PWS_100k"]]
df1<-df1[order(df1[,cv[i]], decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
df2<-cov.list[["TB_100k"]]
df2<-df2[order(df2[,cv[i]], decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
co<-rbind(df1, df2)
co$chrom<-factor(co$chrom, levels=paste0("chr", 1:26))
colnames(co)[which(colnames(co)==cv[i])]<-"cov"
#assgin colors
co$top1<-apply(co, 1, function(x) {ifelse (x['top1']=="N", x['color'], x['top1'])} )
co$top1<-factor(co$top1, levels=c("PWS","TB","col1","col2"))
co$time<-cv[i]
Cov<-rbind(Cov, co[,c("index", "cov","top1","time")])
}
#count the number of sites per chromosomes
df1<-cov.list[["PWS_100k"]]
poss<-data.frame(chr=paste0("chr",1:26))
k=1
for (j in 1:26){
df<-df1[df1$chr==paste0("chr",j),]
poss$start[j]<-k
poss$end[j]<-k+nrow(df)-1
k=k+nrow(df)
}
poss$x<-poss$start+(poss$end-poss$start)/2
ymax<-max(co$cov, na.rm=T)
ggplot(Cov, aes(x=index, y=cov, color=top1))+
facet_wrap(~time, ncol=1)+
geom_point(size=0.5)+
theme_classic()+ylim(-0.1,ymax)+
scale_color_manual(values=c("#FF3293B3","#0433FFB3" ,"#ADD8E680","#C0C0C088"), labels=c("PWS", "TB", "",""))+
ylab("Covariance")+
guides(color = guide_legend(override.aes = list(color=c("deeppink","#0433FF","white","white"), size=2), title=element_text("Outlier (1%)", size=10)))+
scale_x_continuous(name="Chromosome", breaks=poss$x, labels=1:26)
ggsave(paste0("../Output/COV/COVscan_3pop/PWS_TB_100k_Window_Outliers.png"), width = 11, height = 5, dpi=300)
}}cv<-c("cov12","cov13","cov23")
winsize<-c("50k","75k","100k")
cvrange<-data.frame(pop=c(paste0(pops[1:2],"_", cv[1]),paste0(pops[1:2],"_", cv[2]),paste0(pops,"_", cv[3])))
k=1
for (i in 1:length(cv)){
if (i==1|i==2){
if (i==1) k=1
if(i==2) k=3
for (w in 1: length(winsize)){
#PWS
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[order(df1[,cv[i]], decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
rg<-range(df1[df1$top1=="PWS",cv[i]], na.rm=T)
cvrange[k,paste0(winsize[w])]<-paste0(rg[1],"-",rg[2])
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[order(df2[,cv[i]], decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
rg2<-range(df2[df2$top1=="TB", cv[i]], na.rm=T)
cvrange[(k+1),paste0(winsize[w])]<-paste0(rg2[1],"-",rg2[2])
}
}
if (i==3){
k=5
for (w in 1: length(winsize)){
#pws
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[,c("chrom","start","end","cov23")]
df1<-df1[order(df1$cov23, decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$top1<-"N"
df1$top1[1:n]<-"PWS"
rg<-range(df1[df1$top1=="PWS",cv[i]], na.rm=T)
cvrange[k,paste0(winsize[w])]<-paste0(rg[1],"-",rg[2])
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[,c("chrom","start","end","cov23")]
df2<-df2[order(df2$cov23, decreasing=T),]
df2$top1<-"N"
df2$top1[1:n]<-"TB"
rg2<-range(df2[df2$top1=="TB", cv[i]], na.rm=T)
cvrange[(k+1),paste0(winsize[w])]<-paste0(rg2[1],"-",rg2[2])
#ss
df3<-cov.list[[paste0("SS_", winsize[w])]]
df3<-df3[,c("chrom","start","end","cov23")]
df3<-df3[order(df3$cov23, decreasing=T),]
df3$top1<-"N"
df3$top1[1:n]<-"SS"
rg3<-range(df3[df3$top1=="SS", cv[i]], na.rm=T)
cvrange[(k+2),paste0(winsize[w])]<-paste0(rg3[1],"-",rg3[2])
}
}
}
write.csv(cvrange, "../Output/COV/COVscan_3pop/COVranges_check.csv")
cvs<-melt(cvrange, id.vars = "pop")
cvs<-cvs %>%
separate(value, c("low", "high"), "-")
cvs$low<-as.numeric(cvs$low)
cvs$high<-as.numeric(cvs$high)
cvs<-cvs%>%
separate(pop, c("pop", "cov"), "_")
ggplot(cvs, aes(x=cov, y=high, fill=pop))+
facet_wrap(~variable)+
geom_crossbar(aes(ymin=low, ymax=high), width=0.5, position=position_dodge(width = 1))+
ylab("Range of covariances")+
theme_light()+
geom_vline(xintercept=c(1.5,2.5), color="gray")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave("../Output/COV/COVscan_3pop/TempCov_Range_comparison.png", width = 7, height = 3, dpi=300)cv<-c("cov12","cov13","cov23")
winsize<-"100k"
#cvrange<-data.frame(pop=c(paste0(pops[1:2],"_", cv[1]),paste0(pops[1:2],"_", cv[2]),paste0(pops,"_", cv[3])))
k=1
w=1
for (c in 1:length(cv)){
if (c==1|c==2){
#PWS
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[order(df1[,cv[c]], decreasing=T),]
n<-ceiling(nrow(df1)*0.01) #top1% region
df1$pop<-"PWS"
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[order(df2[,cv[c]], decreasing=T),]
df2$pop<-"TB"
covs<-rbind(df1, df2)
colnames(covs)[colnames(covs)==cv[c]]<-"cov"
#remove one extreme
covs<-covs[covs$cov>-0.3,]
#create a table of numbers of loci in the cov bins
counts<-data.frame(pop=c("PWS","TB"))
bins<-seq(-0.05, 0.05,0.01)
for (i in 1: length(bins)){
if (i==1){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov<bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov<bins[i]&covs$pop=="TB"])
}
if (i>1&i<11){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i-1]&covs$cov<bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i-1]&covs$cov<bins[i]&covs$pop=="TB"])
}
if(i==11){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i]&covs$pop=="TB"])
}
}
counts2<-data.frame(t(counts))
colnames(counts2)<-counts2[1,]
counts2<-counts2[-1,]
counts2$bin<-gsub("bin_","",rownames(counts2))
countm<-melt(counts2, id.vars="bin")
countm$value<-as.integer(countm$value)
countm$bin<-as.numeric(countm$bin)
ggplot(countm, aes(x=as.factor(bin), y=value, fill=variable))+
geom_bar(position=position_dodge(width = 0.8), stat="identity", width = 0.8)+
scale_fill_manual(values=cols)+
xlab("Covariance")+ylab("Count")+
theme(legend.title = element_blank())
ggsave(paste0("../Output/COV/COVscan_3pop/",cv[c],"_coutns_PWS_TB.png"), width = 6,height=4, dpi=300)
ggplot(covs, aes(x=cov, fill=pop))+
geom_histogram(data=subset(covs, pop=="PWS"),color="gray60", alpha = 0.5,fill=cols[2], bins=100)+
geom_histogram(data=subset(covs, pop=="TB"), color="gray60", alpha = 0.5,fill=cols[1], bins=100)+
geom_vline(xintercept = mean(covs$cov[covs$pop=="PWS"], na.rm=T), color=cols[2])+
geom_vline(xintercept = mean(covs$cov[covs$pop=="TB"], na.rm=T), color=cols[1])+
xlim(-0.1,0.1)+
annotate("rect", xmin=0.05, xmax=0.053,ymax=700,ymin=680, alpha=0.8, fill=cols[2])+
annotate("rect", xmin=0.05, xmax=0.053,ymax=660,ymin=640, alpha=0.8, fill=cols[1])+
geom_text(label="PWS", x= 0.058, y=700, size=3.5, color="gray20",vjust=1,hjust=0)+
geom_text(label="TB", x= 0.058, y=660, size=3.5, color="gray20",vjust=1, hjust=0)
ggsave(paste0("../Output/COV/COVscan_3pop/",cv[c],"_histgram_PWS_TB.png"), width = 6,height=4, dpi=300)
}
if (c==3){
#PWS
df1<-cov.list[[paste0("PWS_", winsize[w])]]
df1<-df1[,c("chrom","start","end","cov23")]
df1$pop<-"PWS"
#tb
df2<-cov.list[[paste0("TB_", winsize[w])]]
df2<-df2[,c("chrom","start","end","cov23")]
df2$pop<-"TB"
#ss
df3<-cov.list[[paste0("SS_", winsize[w])]]
df3<-df3[,c("chrom","start","end","cov23")]
df3$pop<-"SS"
covs<-rbind(df1,df2,df3)
colnames(covs)[colnames(covs)==cv[c]]<-"cov"
#remove one extreme
covs<-covs[covs$cov>-0.3,]
#create a table of numbers of loci in the cov bins
counts<-data.frame(pop=pops)
bins<-seq(-0.05, 0.05,0.01)
for (i in 1: length(bins)){
if (i==1){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov<bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov<bins[i]&covs$pop=="TB"])
counts[3,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov<bins[i]&covs$pop=="SS"])
}
if (i>1&i<11){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i-1]&covs$cov<bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i-1]&covs$cov<bins[i]&covs$pop=="TB"])
counts[3,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i-1]&covs$cov<bins[i]&covs$pop=="SS"])
}
if(i==11){
counts[1,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i]&covs$pop=="PWS"])
counts[2,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i]&covs$pop=="TB"])
counts[3,paste0("bin_",bins[i])]<-length(covs$cov[covs$cov>=bins[i]&covs$pop=="SS"])
}
}
counts2<-data.frame(t(counts))
colnames(counts2)<-counts2[1,]
counts2<-counts2[-1,]
counts2$bin<-gsub("bin_","",rownames(counts2))
countm<-melt(counts2, id.vars="bin")
countm$value<-as.integer(countm$value)
countm$bin<-as.numeric(countm$bin)
ggplot(countm, aes(x=as.factor(bin), y=value, fill=variable))+
geom_bar(position=position_dodge(width = 0.8), stat="identity", width = 0.8)+
scale_fill_manual(values=cols)+
xlab("Covariance")+ylab("Count")+
theme(legend.title = element_blank())
ggsave(paste0("../Output/COV/COVscan_3pop/",cv[c],"_coutns_3pops.png"), width = 6,height=4, dpi=300)
plots<-list()
for (j in 1:3){
if (j==1) color_name<-cols[2]
if (j==2) color_name<-cols[1]
if (j==3) color_name<-cols[3]
plots[[j]]<- ggplot(covs, aes(x=cov, fill=pop))+
geom_histogram(data=subset(covs, pop==pops[j]),color="gray60", alpha = 0.5,fill=color_name, bins=100)+
geom_vline(xintercept = mean(covs$cov[covs$pop==pops[j]], na.rm=T), color=color_name)+
xlim(-0.1,0.1)+ggtitle(pops[j])
}
{png(paste0("../Output/COV/COVscan_3pop/",cv[c],"_histgram_3pops.png"), height = 8, width = 4.6, res=300, units = "in")
do.call(grid.arrange, c(plots, ncol=1))
dev.off()}
}
}
# How window size affects outlier regions
#COV12
cv<-c("cov12","cov13","cov23")
winsize<-c("50k","75k","100k")
pairs<-t(combn(winsize, 2))
pairs<-data.frame(pairs)
colnames(pairs)<-paste0("window",1:2)
Ov1<-pairs
Ov2<-pairs
Ov3<-pairs
populations<-c("PWS","TB")
for (i in 1:length(cv)){
df<-read.csv(paste0("../Output/COV/3pops_top1percent_outlier_regions.", cv[i], "_new.csv"))
df$id<-paste0(df$chrom,"_",df$start)
pws<-df[df$pop=="PWS",]
tb<-df[df$pop=="TB",]
if (i==3) ss<-df[df$pop=="SS",]
for(j in 1:nrow(pairs)){
isec<-intersect(pws$id[pws$window==pairs[j,1]], pws$id[pws$window==pairs[j,2]])
Ov1[j, cv[i]]<-length(isec)
isec2<-intersect(tb$id[tb$window==pairs[j,1]], tb$id[tb$window==pairs[j,2]])
Ov2[j, cv[i]]<-length(isec2)
if (i==3) {
isec3<-intersect(ss$id[ss$window==pairs[j,1]], ss$id[ss$window==pairs[j,2]])
Ov3[j, cv[i]]<-length(isec3)
}
#### Check chromosome region overlap +-200,000 bases
for(p in 1:length(pops)){
if (p==1) df_pop<-pws
if (p==2) df_pop<-tb
if (p==3 & exists("ss")) {df_pop<-ss}
else next
time1<-df_pop[df_pop$window==pairs[j,1],]
time2<-df_pop[df_pop$window==pairs[j,2],]
overlps<-data.frame()
overlps2<-data.frame()
for (n in 1: nrow(time1)){
re<-time2[time2$chrom==time1$chrom[n],]
if (nrow(re)>=1){
for (s in 1: nrow(re)){
if (re$start[s]<=time1$start[n]+200000 & re$start[s]>=time1$start[n]-200000){
overlps<-rbind(overlps, re[s,])
overlps2<-rbind(overlps2,time1[n,])}
}}
}
#Overlapping windows:
ov<-data.frame(id=overlps$id)
for (n in 1: nrow(overlps)){
if (overlps$start[n]<overlps2$start[n]) {ov$start[n]<-overlps$start[n]; ov$end[n]<-overlps2$end[n]}
if (overlps$start[n]>=overlps2$start[n]) {ov$start[n]<-overlps2$start[n];ov$end[n]<-overlps$end[n]}
}
ov[,paste0("cov.",pairs[j,1])]<-overlps[,4]
ov[,paste0("cov.",pairs[j,2])]<-overlps2[,4]
write.csv(ov, paste0("../Output/COV/COVscan_3pop/Overlap_regions_",pops[p],".", pairs[j,1],".",pairs[j,2], ".",cv[i], "_plusminus200k.csv"), row.names = F)
}
}
}
write.csv(Ov1, paste0("../Output/COV/COVscan_3pop/Overlapping_window_counts_PWS.csv"))
write.csv(Ov2, paste0("../Output/COV/COVscan_3pop/Overlapping_window_counts_TB.csv"))
write.csv(Ov3, paste0("../Output/COV/COVscan_3pop/Overlapping_window_counts_SS.csv"))#100k
cv<-c("cov12","cov13","cov23")
pairs<-t(combn(pops, 2))
pairs<-data.frame(pairs)
colnames(pairs)<-paste0("pop",1:2)
Ov_direct<-data.frame(cov=c(cv[1:2],"cov23-PT","cov23-PS","cov23-ST" ,"cov23-3"))
Ov_300<-data.frame(cov=c(cv[1:2],"cov23-PT","cov23-PS","cov23-ST" ,"cov23-3"))
for (i in 1:length(cv)){
df<-read.csv(paste0("../Output/COV/COVscan_3pop/3pops_top1percent_outlier_regions.", cv[i], "_new.csv"))
df<-df[df$window=="100k",]
df$id<-paste0(df$chrom,"_",df$start)
if (i!=3){
#exact overlaps
isec<-intersect(df$id[df$pop=="PWS"], df$id[df$pop=="TB"])
Ov_direct$count[i]<-length(isec)
#### Check chromosome region overlap +-200,000 bases
pop1<-df[df$pop=="PWS",]
pop2<-df[df$pop=="TB",]
overlps<-data.frame()
overlps2<-data.frame()
for (n in 1: nrow(pop1)){
re<-pop2[pop2$chrom==pop1$chrom[n],]
if (nrow(re)>=1){
for (s in 1: nrow(re)){
if (re$start[s]<=pop1$start[n]+300000 & re$start[s]>=pop1$start[n]-300000){
overlps<-rbind(overlps, re[s,])
overlps2<-rbind(overlps2,pop1[n,])}
}
}
}
# Merge two tables into one summary overlap table:
ov<-data.frame(id=overlps$id)
for (n in 1: nrow(overlps)){
if (overlps$start[n]<overlps2$start[n]) {ov$start[n]<-overlps$start[n]; ov$end[n]<-overlps2$end[n]}
if (overlps$start[n]>=overlps2$start[n]) {ov$start[n]<-overlps2$start[n];ov$end[n]<-overlps$end[n]}
}
ov[,"cov.PWS"]<-overlps[,4]
ov[,"cov.TB"]<-overlps2[,4]
write.csv(ov, paste0("../Output/COV/COVscan_3pop/Overlap_regions_",cv[i],"_plusminus300k.csv"), row.names = F)
Ov_300$count[i]<-nrow(ov)
}
if (i==3){
isec<-intersect(df$id[df$pop=="PWS"], df$id[df$pop=="TB"])
isec2<-intersect(df$id[df$pop=="PWS"], df$id[df$pop=="SS"])
isec3<-intersect(df$id[df$pop=="SS"], df$id[df$pop=="TB"])
Ov_direct$count[i]<-length(isec)
Ov_direct$count[i+1]<-length(isec2)
Ov_direct$count[i+2]<-length(isec)
Ov_direct$count[i+3]<-length(intersect(df$id[df$pop=="SS"], intersect(df$id[df$pop=="PWS"], df$id[df$pop=="TB"])))
for(j in 1:nrow(pairs)){
#### Check chromosome region overlap +-200,000 bases
pop1<-df[df$pop==pairs[j,1],]
pop2<-df[df$pop==pairs[j,2],]
overlps<-data.frame()
overlps2<-data.frame()
for (n in 1: nrow(pop1)){
re<-pop2[pop2$chrom==pop1$chrom[n],]
if (nrow(re)>=1){
for (s in 1: nrow(re)){
if (re$start[s]<=pop1$start[n]+300000 & re$start[s]>=pop1$start[n]-300000){
overlps<-rbind(overlps, re[s,])
overlps2<-rbind(overlps2,pop1[n,])}
}
}
}
# Merge two tables into one summary overlap table:
ov<-data.frame(id=overlps$id)
for (n in 1: nrow(overlps)){
if (overlps$start[n]<overlps2$start[n]) {ov$start[n]<-overlps$start[n]; ov$end[n]<-overlps2$end[n]}
if (overlps$start[n]>=overlps2$start[n]) {ov$start[n]<-overlps2$start[n];ov$end[n]<-overlps$end[n]}
}
ov[,paste0("cov.",pairs[j,1])]<-overlps[,4]
ov[,paste0("cov.",pairs[j,2])]<-overlps2[,4]
ov<-ov[!duplicated(ov),]
write.csv(ov, paste0("../Output/COV/COVscan_3pop/Overlap_regions_",cv[i],"_",pairs[j,1],".", pairs[j,2],"_plusminus300k.csv"), row.names = F)
Ov_300$count[i+j-1]<-nrow(ov)
}
}
}
write.csv(Ov_direct, paste0("../Output/COV/COVscan_3pop/DDirect_Overlapping_regions_counts_3pop_summary.csv"))
Ov_300$count[6]<-NA
write.csv(Ov_300, paste0("../Output/COV/COVscan_3pop/Overlapping_regions_counts_3pop_plusMinus300k.csv"))Create VCF files with selected regions & run snpEff
cat(dfp)Error in cat(dfp) : argument 1 (type 'list') cannot be handled by 'cat'
## Create summary files of snpEff results (gene annotations in the regions of interest) and reformat as a ShinyGo input
#create gene list
gfiles<-list.files("../Output/COV/COVscan_3pop/", pattern="genes.txt")
for (i in 1:length(gfiles)){
df<-read.table(paste0("../Output/COV/COVscan_3pop/",gfiles[i]), sep="\t")
df<-df[,1:7]
colnames(df)<-c("GeneName","GeneId","TranscriptId","BioType","variants_impact_HIGH","variants_impact_LOW", "variants_impact_MODERATE")
fname<-gsub(".genes.txt","",gfiles[i])
genes<-unique(df$GeneId)
sink(paste0("../Output/COV/COVscan_3pop/geneIDlist_",fname,".txt"))
cat(paste0(genes,"; "))
sink(NULL)
}
#Annotation infor from SnpEff
for (c in 1:3){
if (c!=3){
for (p in 1:2){
ano<-read.table(paste0("../Output/COV/COVscan_3pop/",pops[p],"_outliers_",cv[c],"_annotation"), header = F)
annotations<-data.frame()
for (i in 1: nrow(ano)){
anns<-unlist(strsplit(ano$V4[i], "\\,|\\|"))
annm<-data.frame(matrix(anns,ncol = 16, byrow = TRUE))
annm<-annm[,c(2,3,4,5,8)]
colnames(annm)<-c("Effect","Putative_impact","Gene_name","Gene_ID","Feature type")
annm<-annm[!duplicated(annm), ]
annm$chr<-ano$V1[i]
annm$chr<-ano$V1[i]
annm$pos<-ano$V2[i]
annm$AF<- ano$V3[i]
annotations<-rbind(annotations, annm)
}
annotations<-annotations[,c(6:8,1:5)]
annotations<-annotations[!duplicated(annotations[,1:2]),]
write.csv(annotations, paste0("../Output/COV/COVscan_3pop/Genes_",pops[p],"_outliers_100k_",cv[c],".csv"), row.names = F)
}
}
if (c==3){
for (p in 1:3){
ano<-read.table(paste0("../Output/COV/COVscan_3pop/",pops[p],"_outliers_",cv[c],"_annotation"), header = F)
annotations<-data.frame()
for (i in 1: nrow(ano)){
anns<-unlist(strsplit(ano$V4[i], "\\,|\\|"))
annm<-data.frame(matrix(anns,ncol = 16, byrow = TRUE))
annm<-annm[,c(2,3,4,5,8)]
colnames(annm)<-c("Effect","Putative_impact","Gene_name","Gene_ID","Feature type")
annm<-annm[!duplicated(annm), ]
annm$chr<-ano$V1[i]
annm$chr<-ano$V1[i]
annm$pos<-ano$V2[i]
annm$AF<- ano$V3[i]
annotations<-rbind(annotations, annm)
}
annotations<-annotations[,c(6:8,1:5)]
annotations<-annotations[!duplicated(annotations[,1:2]),]
write.csv(annotations, paste0("../Output/COV/COVscan_3pop/Genes_",pops[p],"_outliers_100k_",cv[c],".csv"), row.names = F)
}
}
}length(common)[1] 6
| time | PWS | TB | SS | common_PWS.TB | common_PWS.SS | common_SS.TB | common3 |
|---|---|---|---|---|---|---|---|
| cov12 | 568 | 635 | NA | 89 | NA | NA | NA |
| cov13 | 586 | 643 | NA | 80 | NA | NA | NA |
| cov23 | 495 | 628 | 417 | 64 | 80 | 30 | 10 |
## Check chromosome overlappws1<-read.csv("../Output/COV/COVscan_3pop/3pops_top1percent_outlier_regions.cov12_new.csv")
pws1<-pws1[pws1$pop=="PWS" & pws1$window=="100k",]
pws2<-read.csv("../Output/COV/3pops_top1percent_outlier_regions.cov12.csv")
pws2<-pws2[pws2$pop=="PWS"&pws2$window=="100k",]
pws1$vcf<-"3Pops"
pws2$vcf<-"PH"
pws<-rbind(pws1[,c("chrom","start","end","cov12","vcf")],pws2[,c("chrom","start","end","cov12","vcf")])
pws$chrom<-factor(pws$chrom, levels=paste0("chr", 1:26))
ggplot(data=pws,aes(x=start/1000000, y=cov12, color=vcf, fill=vcf))+
facet_wrap(~chrom)+
geom_point(position=position_dodge(width = 0.7), alpha=0.5)+xlab("Position (Mb)")+
ggtitle("PWS COV12")
ggsave("../Output/COV/COVscan_3pop/PWS_PH.vs.3Pops_outlier_overlap_cov12.png", width = 10, height = 8, dpi=300)pws1<-read.csv("../Output/COV/COVscan_3pop/3pops_top1percent_outlier_regions.cov23_new.csv")
pws1<-pws1[pws1$pop=="PWS" & pws1$window=="100k",]
pws2<-read.csv("../Output/COV/3pops_top1percent_outlier_regions.cov23.csv")
pws2<-pws2[pws2$pop=="PWS"&pws2$window=="100k",]
pws1$vcf<-"PH"
pws2$vcf<-"3Pops"
pws<-rbind(pws1[,c("chrom","start","end","cov23","vcf")],pws2[,c("chrom","start","end","cov23","vcf")])
pws$chrom<-factor(pws$chrom, levels=paste0("chr", 1:26))
ggplot(data=pws,aes(x=start/1000000, y=cov23, color=vcf, fill=vcf))+
facet_wrap(~chrom)+
geom_point(position=position_dodge(width = 0.7), alpha=0.5)+xlab("Position (Mb)")+
ggtitle("PWS COV23")
ggsave("../Output/COV/COVscan_3pop/PWS_PH.vs.3Pops_outlier_overlap_cov23.png", width = 10, height = 8, dpi=300)pws1<-read.csv("../Output/COV/COVscan_3pop/3pops_top1percent_outlier_regions.cov13_new.csv")
pws1<-pws1[pws1$pop=="PWS" & pws1$window=="100k",]
pws2<-read.csv("../Output/COV/3pops_top1percent_outlier_regions.cov13.csv")
pws2<-pws2[pws2$pop=="PWS"&pws2$window=="100k",]
pws1$vcf<-"PH"
pws2$vcf<-"3Pops"
pws<-rbind(pws1[,c("chrom","start","end","cov13","vcf")],pws2[,c("chrom","start","end","cov13","vcf")])
pws$chrom<-factor(pws$chrom, levels=paste0("chr", 1:26))
ggplot(data=pws,aes(x=start/1000000, y=cov13, color=vcf, fill=vcf))+
facet_wrap(~chrom)+
geom_point(position=position_dodge(width = 0.7), alpha=0.5)+xlab("Position (Mb)")+
ggtitle("PWS COV13")
ggsave("../Output/COV/COVscan_3pop/PWS_PH.vs.3Pops_outlier_overlap_cov13.png", width = 10, height = 8, dpi=300)### Interpopulation comparisons
#decode the samples to create the right matrix
cv<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_3pops.csv", header = F)
labs<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_labels_3pops.csv" )
labs<-labs[,-1]
cvm<-data.frame(label=as.vector(t(labs)), cov=as.vector(t(cv)))
#rearrange based on comparions: covariance between populations within the same period
#PopYr Symbols
# PH 1 'PWS', 1991
# PH 2 'PWS', 1996
# PH 3 'PWS', 2006
# PH 4 'PWS', 2017
# PH 5 'SS', 1991
# PH 6 'SS', 1996
# PH 7 'SS', 2006
# PH 8 'SS', 2017
# PH 9 'TB', 1991
# PH 10'TB', 1996
# PH 11'TB', 2006
# PH 12'TB', 2017
Covs<-data.frame(pops=rep(c("PWS.vs.SS", "PWS.vs.TB", "SS.vs.TB"), times=6),
period=c(rep("1991-1996", times=3),rep("1996-2006", times=3), rep("2006-2017", times=3)))
Covs$cov<-c(NA, cvm$cov[cvm$label=="cov(PH: 2-1, PH: 10-9)"],NA,
cvm$cov[cvm$label=="cov(PH: 3-2, PH: 7-6)"],cvm$cov[cvm$label=="cov(PH: 3-2, PH: 11-10)"],
cvm$cov[cvm$label=="cov(PH: 7-6, PH: 11-10)"],
cvm$cov[cvm$label=="cov(PH: 4-3, PH: 8-7)"],cvm$cov[cvm$label=="cov(PH: 4-3, PH: 12-11)"],cvm$cov[cvm$label=="cov(PH: 8-7, PH: 12-11)"])
#C.I.
cis<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_COV_Interpop_comparison_CIs.csv")
cis<-cis[,-1]
cim<-data.frame(label=as.vector(t(labs)), ci_l=as.vector(t(cis[1:11,])))
cim$ci_h<-as.vector(t(cis[12:22,]))
Covs$ci_l<-as.numeric(c(NA,cim$ci_l[cim$label=="cov(PH: 2-1, PH: 10-9)"],NA,
cim$ci_l[cim$label=="cov(PH: 3-2, PH: 7-6)"],cim$ci_l[cim$label=="cov(PH: 3-2, PH: 11-10)"], cim$ci_l[cim$label=="cov(PH: 7-6, PH: 11-10)"],
cim$ci_l[cim$label=="cov(PH: 4-3, PH: 8-7)"],cim$ci_l[cim$label=="cov(PH: 4-3, PH: 12-11)"], cim$ci_l[cim$label=="cov(PH: 8-7, PH: 12-11)"]))
Covs$ci_h<-as.numeric(c(NA, cim$ci_h[cim$label=="cov(PH: 2-1, PH: 10-9)"],NA,
cim$ci_h[cim$label=="cov(PH: 3-2, PH: 7-6)"],cim$ci_h[cim$label=="cov(PH: 3-2, PH: 11-10)"], cim$ci_h[cim$label=="cov(PH: 7-6, PH: 11-10)"],
cim$ci_h[cim$label=="cov(PH: 4-3, PH: 8-7)"],cim$ci_h[cim$label=="cov(PH: 4-3, PH: 12-11)"], cim$ci_h[cim$label=="cov(PH: 8-7, PH: 12-11)"]))
ggplot(Covs, aes(x=period, y=cov, fill=pops))+
geom_bar(stat="identity",position=position_dodge(width = 0.7), width=0.8)+
ylab("Covariance")+xlab('')+theme_classic()+
geom_hline(yintercept = 0,color="gray70", size=0.3)+
scale_fill_manual(values=cols[c(2,1,3)])+
theme(legend.title = element_blank())+
scale_y_continuous(labels = comma)+
ylim(-0.0013, 0.002)+
geom_errorbar(aes(ymin=ci_l, ymax=ci_h), width=.2, size=.2, position=position_dodge(width = 0.7))
ggsave("../Output/COV/Interpop_cov_comparison_3Pops_new.png",width = 4.7, height = 3, dpi=300)
# point plot
ggplot(Covs, aes(x=period, y=cov, color=pops))+
geom_point(position=position_dodge(width = 0.7), size=4)+
ylab("Covariance")+xlab('')+theme_classic()+
geom_hline(yintercept = 0,color="gray70", size=0.3)+
scale_color_manual(values=cols[c(2,1,3)])+
theme(legend.title = element_blank())+
scale_y_continuous(labels = comma)+
geom_errorbar(aes(ymin=ci_l, ymax=ci_h), width=.2, size=.2, position=position_dodge(width = 0.7))+
ylim(-0.0013, 0.002)+
geom_vline(xintercept = c(1.5,2.5), color="gray", size=0.3)
ggsave("../Output/COV/Interpop_cov_comparison_3Pops_new_PointPlot.png",width = 4.7, height = 3, dpi=300)## Longer time-period
Covs2<-data.frame(pops=rep(c("PWS.vs.SS", "PWS.vs.TB", "SS.vs.TB"), times=3),
period=c(rep("1991-2006", times=3),rep("1991-2017", times=3),rep("1996-2017", times=3)))
cv1<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_1991-2006.csv", header = F)
labs1<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_labels_1991-2006.csv" )
labs1<-labs1[,-1]
cvm1<-data.frame(label=as.vector(t(labs1)), cov=as.vector(t(cv1)))
cv2<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_1991-2017.csv", header = F)
labs2<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_labels_1991-2017.csv" )
labs2<-labs2[,-1]
cvm2<-data.frame(label=as.vector(t(labs2)), cov=as.vector(t(cv2)))
cv3<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_1996-2017.csv", header = F)
labs3<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_Covs_interPopulations_100k_labels_1996-2017.csv" )
labs3<-labs3[,-1]
cvm3<-data.frame(label=as.vector(t(labs3)), cov=as.vector(t(cv3)))
Covs2$cov<-c(NA, cvm1$cov[cvm1$label=="cov(PH: 2-1, PH: 4-3)"], NA,
NA, cvm2$cov[cvm2$label=="cov(PH: 2-1, PH: 4-3)"], NA,
cvm3$cov[cvm3$label=="cov(PH: 2-1, PH: 4-3)"], cvm3$cov[cvm3$label=="cov(PH: 2-1, PH: 6-5)"], cvm3$cov[cvm3$label=="cov(PH: 4-3, PH: 6-5)"])
#C.I.
cis1<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_COV_Interpop_comparison_CIs_1991-2006.csv")
cis1<-cis1[,-1]
cis2<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_COV_Interpop_comparison_CIs_1991-2017.csv")
cis2<-cis2[,-1]
cis3<-read.csv("~/Projects/Pacherring_Vincent/MD7000/GW_COV_Interpop_comparison_CIs_1996-2017.csv")
cis3<-cis3[,-1]
#cim<-data.frame(label=as.vector(t(labs)), ci_l=as.vector(t(cis1[1:4,])))
#cim$ci_h<-as.vector(t(cis[12:22,]))
Covs2$ci_l<-as.numeric(c(NA,cis1[1,3],NA,
NA,cis2[1,3],NA,
cis3[1,3],cis3[1,5],cis3[3,5]))
Covs2$ci_h<-as.numeric(c(NA,cis1[4,3],NA,
NA,cis2[4,3],NA,
cis3[6,3],cis3[6,5],cis3[8,5]))
ggplot(Covs2, aes(x=period, y=cov, fill=pops))+
geom_bar(stat="identity",position=position_dodge(width = 0.7), width=0.8)+
ylab("Covariance")+xlab('')+theme_classic()+
geom_hline(yintercept = 0,color="gray70", size=0.3)+
scale_fill_manual(values=cols[c(2,1,3)])+
theme(legend.title = element_blank())+scale_y_continuous(labels = comma)+
ylim(-0.0013, 0.002)+
geom_errorbar(aes(ymin=ci_l, ymax=ci_h), width=.2, size=.2, position=position_dodge(width = 0.7))
ggsave("../Output/COV/Interpop_cov_comparison_3Pops_LonogerPeriod.png",width = 4.7, height = 3, dpi=300)
ggplot(Covs2, aes(x=period, y=cov, color=pops))+
geom_point(position=position_dodge(width = 0.7), size=4)+
ylab("Covariance")+xlab('')+theme_classic()+
geom_hline(yintercept = 0,color="gray70", size=0.3)+
scale_color_manual(values=cols[c(2,1,3)])+
theme(legend.title = element_blank())+
scale_y_continuous(labels = comma)+
ylim(-0.0013, 0.002)+
geom_errorbar(aes(ymin=ci_l, ymax=ci_h), width=.2, size=.2, position=position_dodge(width = 0.7))+
geom_vline(xintercept = c(1.5,2.5), color="gray", size=0.3)
ggsave("../Output/COV/Interpop_cov_comparison_3PopsLonogerPeriod_PointPlot.png",width = 4.7, height = 3, dpi=300)pops<-c("PWS91","PWS96","PWS07","PWS17")
yr<-c(1991,1996,2007,2017)
maf<-data.frame()
for (i in 1:4){
af<-read.table(paste0("../Data/new_vcf/AF/",pops[i],".mafs"),sep="\t", header = T)
af<-af[af$chromo=="chr13"&af$position>=23070000&af$position<=23080000,]
af$year<-yr[i]
maf<-rbind(maf,af)
}
#write.csv(maf,"../Output/COV/COVscan_3pop/AF_maf_chr13_23Mb.csv")
ggplot(maf, aes(x=year, y=knownEM, color=factor(position)))+
geom_point()+
geom_path()+ggtitle("MAF (ANGSD)")+
ylab("maf")+
theme(legend.title=element_blank())
ggsave("../Output/COV/COVscan/AF_ch13_23.07-23.08_angsd.png", width = 5, height=3, dpi=300)
AF<-data.frame()
for (i in 1:4){
af<-read.table(paste0("../Data/new_vcf/AF/",pops[i],"_maf05_af.frq"),header = FALSE, skip=1, col.names = c("chr","pos","n_allele","n_sample","MajorAF","MAF"))
af<-af[af$chr=="chr13"&af$pos>=23070000&af$pos<=23080000,]
af$year<-yr[i]
af$maf<-substr(af$MAF, 3,10)
af$maf<-as.numeric(af$maf)
af<-af[,c("chr","pos","maf","year")]
AF<-rbind(AF,af)
}
ggplot(AF, aes(x=year, y=maf, color=factor(pos)))+
geom_point()+
geom_path()+ggtitle("MAF (vcftools)")+
theme(legend.title=element_blank())
ggsave("../Output/COV/COVscan/AF_ch13_23.07-23.08.png", width = 5, height=3, dpi=300)
###TB
pops<-c("TB91","TB96","TB06","TB17","PWS91","PWS96","PWS07","PWS17","SS96","SS06","SS17")
yr<-c(1991,1996,2006,2017,1991,1996,2007,2017,1996,2006,2017)
maf<-data.frame()
for (i in 1:length(pops)){
af<-read.table(paste0("../Data/new_vcf/AF/",pops[i],".mafs"),sep="\t", header = T)
af<-af[af$chromo=="chr13"&af$position>=23070000&af$position<=23080000,]
af$year<-yr[i]
af$pop<-sub("\\d\\d","", pops[i])
maf<-rbind(maf,af)
}
#write.csv(maf,"../Output/COV/COVscan_3pop/AF_maf_chr13_23Mb.csv")
ggplot(maf, aes(x=year, y=knownEM, color=pop))+
facet_wrap(~factor(position))+
geom_point()+
geom_path()+ggtitle("Chr13 (rel gene)")+
ylab("maf")+
theme(legend.title=element_blank())
ggsave("../Output/COV/COVscan/AF_ch13_23.07-23.08_angsd.png", width = 5, height=3, dpi=300)